# @shuaige : 陈世玉
# @name :china_university.py
# @time :2024/12/6 14:55
import time
from selenium.webdriver.common.by import By

from selenium import webdriver
path ='chromedriver.exe'
browser = webdriver.Chrome(path)
url='https://www.shanghairanking.cn/rankings/bcur/2024'
browser.get(url)
i=0
#1.进行文件写入
with open ('data2.txt','w',encoding='utf-8') as f:
    while i <17 :#爬取17页数据
        i+=1
        print("正在爬取第"+str(i)+"页")
        #2.分别获取网页中的数据
        rankings=browser.find_elements_by_xpath('//td[1]')#爬取排名
        names=browser.find_elements_by_xpath("//td[2]")#爬取学校名称  加上//div/span[@class='name-cn']只获取学校名称
        regions=browser.find_elements_by_xpath('//td[3]')#爬取地区
        schooltypes=browser.find_elements_by_xpath('//td[4]')#爬取学校类型
        scores=browser.find_elements_by_xpath('//td[5]')#爬取总分
        levels=browser.find_elements_by_xpath('//td[6]')#爬取办学层次
        for ranking,name,region,schooltype,score,level in zip(rankings,names,regions,schooltypes,scores,levels):
            f.write(ranking.text+' '+name.text+' '+region.text+' '+schooltype.text+' '+score.text+' '+level.text+'\n')
            print(ranking.text+' '+name.text+' '+region.text+' '+schooltype.text+' '+score.text+' '+level.text)

        # 设置隐式等待时间为10秒
        browser.implicitly_wait(10)
        # 滚动到底部
        browser.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)
        # 查找并点击 "下一页" 按钮
        next_button = browser.find_element(By.CSS_SELECTOR, ".ant-pagination-next")
        next_button.click()
        time.sleep(2)

browser.quit()

